library(tidyverse)
library(foreign)
library(ggpubr)
library(mclogit)

#
#
#2008 election precinct election results
#
#
elec_08_precinct = read.dbf("control_data/2008 election results/state_g08_sov_data_by_g08_srprec.DBF") |> 
  filter(SRPREC != "CNTYTOT") |> 
  select(COUNTY, COUNTY_COD, SRPREC, SRPREC_KEY, PRSDEM, TOTVOTE)

#precinct to place crosswalk 
precinct_city_crosswalk = read.dbf("control_data/2008 election results/state_g08_srprec_to_city.dbf")

#srprecin city
results_08 = merge(elec_08_precinct, precinct_city_crosswalk) |> 
  group_by(CITY) |> 
  summarise(elec_res = sum(PRSDEM)/sum(TOTVOTE)) |> 
  mutate(PLACE = tolower(CITY), dem_share_08 = elec_res) |> select(PLACE, dem_share_08) |> distinct(PLACE, .keep_all = T)


#california data
load("california.Rds")
#merge in 2008 results as dem_share_08
california = california |> merge(results_08, all.x = T, by = "PLACE")

#correlation coefficient of 94.3
cor(california$dem_share, california$dem_share_08, use = "complete.obs")




#
#now run same processes as main paper
#
#(raw data display but using dem_share_08 instead)
california$dem_share = california$dem_share_08
#

candidates = california
winners = california |> filter(ELECTED == 1)

rep = c("Business Employee", "Military or Law Enforcement", "Business Owner/Executive")
dem = c("Service Based Professional" , "Lawyer", "Non-Profit Worker")



#main approach 
graph_data = data.frame(PLACE = rep(unique(california$PLACE), 9)) |> 
  arrange(PLACE) |> 
  mutate(label = rep(unique(california$label), length(unique(california$PLACE))))



#candidates
gd_cand = merge(graph_data, 
                candidates |> 
                  group_by(PLACE, dem_share, label, total_population) |> 
                  count() |> 
                  merge(
                    candidates |> 
                      group_by(PLACE) |> 
                      summarise(n_tot = n())
                  ) |> 
                  mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_cand = gd_cand |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_cand = merge(gd_cand, tiles_cand) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))





#winners
gd_win = merge(graph_data, 
               winners |> 
                 group_by(PLACE, dem_share, label, total_population) |> 
                 count() |> 
                 merge(
                   winners |> 
                     group_by(PLACE) |> 
                     summarise(n_tot = n())
                 ) |> 
                 mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_win = gd_win |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_win = merge(gd_win, tiles_win) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))



#plots
r_candidates_plot = ggplot(gd_cand |> filter(label %in% rep), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category")+
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






r_winners_plot = ggplot(gd_win |> filter(label %in% rep), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





rep_careers = ggarrange(r_candidates_plot, r_winners_plot, 
                        common.legend = T, legend = "bottom")



rep_careers



d_candidates_plot = ggplot(gd_cand |> filter(label %in% dem), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category") + 
  scale_color_brewer(palette = "Set2")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  ) + 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






d_winners_plot = ggplot(gd_win |> filter(label %in% dem), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





dem_careers = ggarrange(d_candidates_plot, d_winners_plot, 
                        common.legend = T, legend = "bottom")



ggarrange(rep_careers, dem_careers, nrow = 2)

ggsave(filename = "figures/app_election08_rawdata.png", height = 12, width = 12, bg = "white")



#
#
#tausanovitch and warshaw ideological scores 
#
#
load("california.Rds")
tw = read_csv("control_data/taus_war_scores.csv")
tw_ca = tw |> filter(State == "CA") |> mutate(place_fips = substr(place_fips, 2, 6)) |> select(place_fips, mrp_ideology)
california = merge(california, tw_ca)
california$mrp_ideology = scale(-1 * california$mrp_ideology)

#
#now run same processes as main paper
#
#(raw data display but using tw score instead)
california$dem_share = california$mrp_ideology
#

candidates = california
winners = california |> filter(ELECTED == 1)

rep = c("Business Employee", "Military or Law Enforcement", "Business Owner/Executive")
dem = c("Service Based Professional" , "Lawyer", "Non-Profit Worker")



#main approach 
graph_data = data.frame(PLACE = rep(unique(california$PLACE), 9)) |> 
  arrange(PLACE) |> 
  mutate(label = rep(unique(california$label), length(unique(california$PLACE))))



#candidates
gd_cand = merge(graph_data, 
                candidates |> 
                  group_by(PLACE, dem_share, label, total_population) |> 
                  count() |> 
                  merge(
                    candidates |> 
                      group_by(PLACE) |> 
                      summarise(n_tot = n())
                  ) |> 
                  mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_cand = gd_cand |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_cand = merge(gd_cand, tiles_cand) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))





#winners
gd_win = merge(graph_data, 
               winners |> 
                 group_by(PLACE, dem_share, label, total_population) |> 
                 count() |> 
                 merge(
                   winners |> 
                     group_by(PLACE) |> 
                     summarise(n_tot = n())
                 ) |> 
                 mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_win = gd_win |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_win = merge(gd_win, tiles_win) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))



#plots
r_candidates_plot = ggplot(gd_cand |> filter(label %in% rep), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  #scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Tausanovitch and Warshaw (2014) Score", y = "Share Candidates", color = "Category", shape = "Category")+
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 3.33, linetype = "dashed") + 
  geom_text(x = 3.33, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = -1.19, linetype = "dashed") + 
  geom_text(x = -1.19, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






r_winners_plot = ggplot(gd_win |> filter(label %in% rep), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  #scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Tausanovitch and Warshaw (2014) Score", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 3.33, linetype = "dashed") + 
  geom_text(x = 3.33, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = -1.19, linetype = "dashed") + 
  geom_text(x = -1.19, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





rep_careers = ggarrange(r_candidates_plot, r_winners_plot, 
                        common.legend = T, legend = "bottom")



rep_careers




d_candidates_plot = ggplot(gd_cand |> filter(label %in% dem), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  #scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Tausanovitch and Warshaw (2014) Score", y = "Share Candidates", color = "Category", shape = "Category") + 
  scale_color_brewer(palette = "Set2")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )+ 
  geom_vline(xintercept = 3.33, linetype = "dashed") + 
  geom_text(x = 3.33, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = -1.19, linetype = "dashed") + 
  geom_text(x = -1.19, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)









d_winners_plot = ggplot(gd_win |> filter(label %in% dem), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .25)) + 
  #scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Tausanovitch and Warshaw (2014) Score", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )+ 
  geom_vline(xintercept = 3.33, linetype = "dashed") + 
  geom_text(x = 3.33, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = -1.19, linetype = "dashed") + 
  geom_text(x = -1.19, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






dem_careers = ggarrange(d_candidates_plot, d_winners_plot, 
                        common.legend = T, legend = "bottom")







ggarrange(rep_careers, dem_careers, nrow = 2)

ggsave(filename = "figures/app_tauswar_rawdata.png", height = 12, width = 12, bg = "white")




#
#
#look at frequencies by year
#
#
load("california.Rds")
california |> 
  mutate(YEAR = as.numeric(as.character(YEAR))) |> 
  group_by(YEAR, label) |> 
  count() |> 
  ungroup() |> 
  group_by(YEAR) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = YEAR, y = prop, color = label)) + 
  geom_point(size = 2) + geom_smooth(linewidth = 2, se = F) + theme_bw() + 
  labs(color = "Career Category", x = "Year", y = "Proportion of all candidates") + 
  scale_y_continuous(labels = scales::percent) + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 14),
    axis.text = element_text(size = 14, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    legend.position = "bottom"
    #plot.background = element_rect(fill = alpha('blue', .1))
  ) + 
  guides(color=guide_legend(nrow=3,byrow=TRUE))
ggsave(filename = "figures/app_overtime.png", height = 12, width = 12, bg = "white") 
  



#
#
#California limited to only open seat elections  
#
#
load("california.Rds")

california = california |> 
  group_by(place_fips, YEAR, RACEID) |> 
  filter(!any(INCUMB == "Y"))


candidates = california
winners = california |> filter(ELECTED == 1)

rep = c("Business Employee", "Military or Law Enforcement", "Business Owner/Executive")
dem = c("Service Based Professional" , "Lawyer", "Non-Profit Worker")



#main approach 
graph_data = data.frame(PLACE = rep(unique(california$PLACE), 9)) |> 
  arrange(PLACE) |> 
  mutate(label = rep(unique(california$label), length(unique(california$PLACE))))



#candidates
gd_cand = merge(graph_data, 
                candidates |> 
                  group_by(PLACE, dem_share, label, total_population) |> 
                  count() |> 
                  merge(
                    candidates |> 
                      group_by(PLACE) |> 
                      summarise(n_tot = n())
                  ) |> 
                  mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_cand = gd_cand |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_cand = merge(gd_cand, tiles_cand) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share))





#winners
gd_win = merge(graph_data, 
               winners |> 
                 group_by(PLACE, dem_share, label, total_population) |> 
                 count() |> 
                 merge(
                   winners |> 
                     group_by(PLACE) |> 
                     summarise(n_tot = n())
                 ) |> 
                 mutate(prop = n/n_tot), all.x = T) |> 
  mutate(prop = replace_na(prop, 0)) |> 
  group_by(PLACE) |> 
  fill(dem_share, .direction = "downup") |> 
  fill(total_population, .direction = "downup")


#which tile (decile) and its average proportion by label
tiles_win = gd_win |> 
  group_by(PLACE) |> 
  summarise(dem_share = mean(dem_share)) |> 
  mutate(tiles = ntile(dem_share, 10)) 
avg_by_decile_win = merge(gd_win, tiles_win) |> 
  group_by(label, tiles) |> 
  summarise(cat_mean = mean(prop),
            dem_share_mean = mean(dem_share)) |> 
  drop_na(tiles)



#plots
r_candidates_plot = ggplot(gd_cand |> filter(label %in% rep), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .3)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category")+
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






r_winners_plot = ggplot(gd_win |> filter(label %in% rep), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% rep), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% rep), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .3)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set1")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('red', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





rep_careers = ggarrange(r_candidates_plot, r_winners_plot, 
                        common.legend = T, legend = "bottom")



rep_careers



d_candidates_plot = ggplot(gd_cand |> filter(label %in% dem), 
                           aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_cand |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_cand |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .3)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "All Candidates", x= "Democratic Voteshare", y = "Share Candidates", color = "Category", shape = "Category") + 
  scale_color_brewer(palette = "Set2")  + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  ) + 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)






d_winners_plot = ggplot(gd_win |> filter(label %in% dem), 
                        aes(x = dem_share, y = prop, color = label, shape = label)) + 
  geom_point(alpha = 0.2) + 
  ylim(0, .25) + 
  geom_point(data = avg_by_decile_win |> filter(label %in% dem), 
             mapping = aes(x = dem_share_mean, y = cat_mean, color = label), 
             size = 4) + 
  geom_line(data = avg_by_decile_win |> filter(label %in% dem), 
            mapping = aes(x = dem_share_mean, y = cat_mean, color = label)) + 
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0, .3)) + 
  scale_x_continuous(labels = scales::percent) + 
  labs(title = "Only Election Winners", x= "Democratic Voteshare", y = "Share Winners", color = "Category", shape = "Category") +
  scale_color_brewer(palette = "Set2") + 
  theme(
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 18),
    axis.text = element_text(size = 12, colour = "black"),
    axis.title = element_text(size = 18, colour = "black"),
    plot.title = element_text(size = 18, colour = "black"),
    panel.background = element_rect(fill = 'white'),
    #plot.background = element_rect(fill = alpha('blue', .1))
  )+ 
  geom_vline(xintercept = 0.9591331, linetype = "dashed") + 
  geom_text(x = 0.9591331, y = .22, label = "Berkeley", angle = 90, vjust = -.7, color= "black", size = 4)+ 
  geom_vline(xintercept = 0.4543273, linetype = "dashed") + 
  geom_text(x = 0.4543273, y = .22, label = "Bakersfield", angle = 90, vjust = -.7, color= "black", size = 4)





dem_careers = ggarrange(d_candidates_plot, d_winners_plot, 
                        common.legend = T, legend = "bottom")



ggarrange(rep_careers, dem_careers, nrow = 2)
ggsave(filename = "figures/app_openseat_rawdata.png", height = 12, width = 12, bg = "white")



#
#
#national sample model separated in nonpartisan cities only 
#
#
load("national.Rds")
tw_ideo = read_csv("control_data/taus_war_scores.csv") %>% 
  mutate(City = tolower(City))
##big cities by ideology score 
bigcities = national_sample %>% 
  mutate(City = tolower(City), label = label_1) %>% 
  merge(tw_ideo) %>% 
  mutate(mrp_ideology = scale(mrp_ideology)) %>% 
  mutate(place_fips = as.character(place_fips)) %>% 
  mutate(place_fips = ifelse(nchar(place_fips) == 6, 
                             paste0("0",place_fips), 
                             place_fips)) %>% 
  mutate(place_fips = ifelse(City == "louisville",
                             2148006,
                             place_fips)) %>% 
  mutate(place_fips = ifelse(City == "honolulu",
                             1571550,
                             place_fips))
##get census data for these cities
censusdata = read_csv("control_data/nhgis0008_csv/nhgis0008_ds254_20215_place.csv") %>% 
  mutate(
    place_fips = paste0(STATEA, PLACEA),
    total = AON4E001,
    white = AON5E002/total,
    black = AON5E003/total,
    asian = AON5E005/total,
    hispanic = AOODE003/total,
    bachelors = (AOP8E019 + AOP8E020 + AOP8E021 + AOP8E022 + AOP8E023 + AOP8E024 + AOP8E025)/total, 
    income = AOQIE001,
    ln_population = log(total)
  ) %>% 
  rename(
    total_population = total
  ) %>% 
  select(place_fips, total_population, ln_population, white, black, asian, hispanic, bachelors, income)


###occupation
occupationdata = read_csv("control_data/nhgis0009_csv/nhgis0009_ds255_20215_place.csv") %>% 
  mutate(
    place_fips = paste0(STATEA, PLACEA),
    #industry
    total = APGYE001,
    management_cen = APGYE002/total,
    service_cen = APGYE003/total,
    sales_cen = APGYE004/total,
    construction_cen = APGYE005/total,
    #omit: transport_cen,
    #type cats
    #omit: privatecompany_cen = 
    selfemploy_cen = (APGYE013 + APGYE031)/total,
    nonprofit_cen = APGYE019/total,
    govt_cen = APGYE025/total
  ) %>% 
  select(place_fips, total, management_cen, service_cen, sales_cen, 
         construction_cen, selfemploy_cen, nonprofit_cen, govt_cen)


us_cen_final = merge(censusdata, occupationdata)

bigcities_final = merge(bigcities, us_cen_final)



np_bigcities_final = bigcities_final |> filter(
  (City %in% c("baltimore", "buffalo", "charlotte", "baton rouge", "fort wayne",
              "indianapolis", "jacksonville", "louisville",
              "new york city", "new orleans", "philadelphia", "pittsburgh",
              "tucson", "winston-salem")) == F
)

#nonpartisan 
#model
np_model_national = mblogit(factor(label) ~ mrp_ideology + ln_population 
                         + white + log(income) + black + asian + hispanic + bachelors + 
                           management_cen + service_cen + sales_cen + construction_cen + 
                           selfemploy_cen + nonprofit_cen + govt_cen,
                         data = np_bigcities_final)

ndata_n = data.frame(mrp_ideology = seq(-2.5, 2.5, by = 0.25), 
                     ln_population = median(bigcities_final$ln_population),
                     white = median(bigcities_final$white),
                     black = median(bigcities_final$black),
                     asian = median(bigcities_final$asian),
                     hispanic = median(bigcities_final$hispanic),
                     bachelors = median(bigcities_final$bachelors),
                     income = median(bigcities_final$income),
                     management_cen = median(bigcities_final$management_cen),
                     service_cen = median(bigcities_final$service_cen),
                     sales_cen = median(bigcities_final$sales_cen),
                     construction_cen = median(bigcities_final$construction_cen),
                     selfemploy_cen = median(bigcities_final$selfemploy_cen),
                     nonprofit_cen = median(bigcities_final$nonprofit_cen),
                     govt_cen = median(bigcities_final$govt_cen))



np_n_prediction = cbind(predict(np_model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$fit %>% 
                       data.frame() %>% 
                       mutate(mrp_ideology = seq(-2.5, 2.5, by = 0.25)) %>% 
                       pivot_longer(cols = c(1:9), names_to = "cat", values_to = "share"),
                     predict(np_model_national, newdata = ndata_n, se.fit = TRUE, type = "response")$se.fit %>% 
                       data.frame() %>% 
                       pivot_longer(cols = everything(), names_to = "cat", values_to = "se") %>% 
                       select(se)) |> 
  mutate(cat = str_replace_all(cat, "\\.", " "))






np_n_plot = ggplot(np_n_prediction |> 
                  filter(cat %in% c("Politician or Staff Member", 
                                    "Business Owner Executive",
                                    "Non Profit Worker",
                                    "Military or Law Enforcement")) |> 
                  mutate(mrp_i_flip = -1 * mrp_ideology),
                aes(x = mrp_i_flip, y = share, color = cat)) + 
  geom_point(aes(shape = cat), size = 2) +
  geom_smooth() +
  geom_ribbon(
    aes(
      ymin = share-(1.96*se),
      ymax = share+(1.96*se),
      color = cat,
      fill = cat
    ),
    alpha = 0.4
  ) + 
  labs(color= "Category", 
       fill = "Category",
       shape = "Category") + 
  scale_shape_manual(
    values = c(
      "Business Owner Executive" = 1,
      "Politician or Staff Member" = 2,
      "Military or Law Enforcement" = 3,
      "Non Profit Worker" = 4
    )
  )+
  theme_minimal() + 
  scale_y_continuous(labels = scales::percent) + 
  ylab("Share of City Council Members") + 
  xlab("Scaled Ideology Score") + 
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 14),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "bottom",
        legend.direction = "horizontal")

np_n_plot
ggsave(np_n_plot, filename = "figures/app_np_national_logit.png", width = 10, height = 8, bg = "white")


